/* This program calculates the tax penalty/subsidy  usign the Sample for 2018 only*/

//cap log close
set more 1
clear

// Make sure to set up the correct directory path and appropriate folders //
#d cr 

// Use married dataset as main data
use "dta\Married_Files\married_sample2018.dta", clear

// sort by unique family identifier
sort uniqfm

// Merge with the unmarried data calculations file 
merge 1:1 uniqnewfm using "dta\Unmarried_Files\unmarried_sample2018.dta"

keep if marst==1

// BLS CPI from June of tax year to June 2020

gen inflator=1.90236 if year==1991
replace inflator=1.84414 if year==1992
replace inflator=1.79433 if year==1993
replace inflator=1.74596 if year==1994
replace inflator=1.69902 if year==1995
replace inflator=1.65032 if year==1996
replace inflator=1.61434 if year==1997
replace inflator=1.58763 if year==1998
replace inflator=1.55430 if year==1999
replace inflator=1.49943 if year==2000
replace inflator=1.45972 if year==2001
replace inflator=1.43865 if year==2002
replace inflator=1.40892 if year==2003
replace inflator=1.36801 if year==2004
replace inflator=1.32600 if year==2005
replace inflator=1.27322 if year==2006
replace inflator=1.24389 if year==2007
replace inflator=1.17792 if year==2008
replace inflator=1.20316 if year==2009
replace inflator=1.18848 if year==2010
replace inflator=1.14686 if year==2011
replace inflator=1.13093 if year==2012
replace inflator=1.10918 if year==2013
replace inflator=1.08752 if year==2014
replace inflator=1.08568 if year==2015
replace inflator=1.07677 if year==2016
replace inflator=1.05848 if year==2017
replace inflator=1.02815 if year==2018


drop if ((pwages+swages)*inflator)>1000000

// Calculate differences in federal tax liabilities //
gen feddiff1=(m_Fliability-un_Fliability)*inflator

// Apply a de minimum to the tax/bonus //
replace feddiff1=0 if feddiff1>-10 & feddiff1<10 




// redifine the first measure of race to compare races under first definition of race (both of the same race) // 
gen black1=1 if race1==2  // black couple 
replace black1=0 if race1==1 | race1==3
gen oth1=1 if race1==3  // other race couple
replace oth1=0 if race1==1 | race1==2


// redifine the second measure of race to include (both black or both part black) and (both white or 3/4 white among the two) - same for "other races"
gen black2=1 if race2==2 // both black, or both part black
replace black2=0 if race2==1 | race2==3 
gen oth2=1 if race2==3  // other race
replace oth2=0 if race2==1 | race2==2



// Redifine the third measure of reace to include other combinations of race //
gen black3=1 if race3==2 // both black, or both part black
replace black3=0 if race3==1 | race3==3 
gen oth3=1 if race3==3  // other race
replace oth3=0 if race3==1 | race3==2


// Redifine Hispanic into 2 different dummies to include different measures and compare to non-hispanic//
gen hisp1=1 if hisp==1 // Full hispanic couple 
replace hisp1=0 if hisp!=1


// Include hispanics where at least one of the 2 is hispanic
gen hisp2=1 if hisp==1 
replace hisp2=1 if hisp==2
replace hisp2=0 if hisp==0




local startyear 1991
local endyear 2018

local penalties feddiff1 
local x1 black1 oth1 
local x2 black2 oth2 
local x3 black3 oth3


// generate couples income 
gen real_income=(pwages+swages)*inflator
gen inc_group=0 if real_income==0
replace inc_group=1 if real_income>0 & real_income<=20000
replace inc_group=2 if real_income>20000 & real_income<=40000
replace inc_group=3 if real_income>40000 & real_income<=60000
replace inc_group=4 if real_income>60000 & real_income<=80000
replace inc_group=5 if real_income>80000 & real_income<=100000
replace inc_group=6 if real_income>100000 & real_income<=120000
replace inc_group=7 if real_income>120000 & real_income<=140000
replace inc_group=8 if real_income>140000 & real_income<=160000
replace inc_group=9 if real_income>160000 


// generate income split groups
gen split_group1=0 
replace split_group1=1 if incratio<=.2

gen split_group2=0 
replace split_group2=1 if incratio>0.2 & incratio<=.4

gen split_group3=0 
replace split_group3=1 if incratio>0.4 & incratio<=.6

gen split_group4=0 
replace split_group4=1 if incratio>0.6 & incratio<=.8


gen split_group5=0 
replace split_group5=1 if incratio>0.8 


forvalues j=1/3 {	
foreach var in `penalties' {
	gen black_`var'`j'= .
	gen white_`var'`j'= .
	gen other_`var'`j'=.
	gen raceblk_`var'`j'= .
	gen raceoth_`var'`j'= .
	gen ub5bl_`var'`j'= .
	gen lb5bl_`var'`j'= .
	gen ub10bl_`var'`j'= .
	gen lb10bl_`var'`j'= .
	gen ub5ot_`var'`j'= .
	gen lb5ot_`var'`j'= .
	gen ub10ot_`var'`j'= .
	gen lb10ot_`var'`j'= .
		forvalues i=`startyear'/`endyear' {
			reg `var' `x`j'' [pweight=asecfwt] if year==`i' & real_income>0, robust 
			replace black_`var'`j' = _b[black`j'] if  year== `i'
			replace other_`var'`j' = _b[oth`j'] if  year== `i'
			replace white_`var'`j' = _b[_cons] if year==`i'
			replace raceblk_`var'`j' = (white_`var'`j'+ black_`var'`j') if year==`i'
			replace raceoth_`var'`j' = (white_`var'`j'+ other_`var'`j') if year==`i'
			matrix vv_`var'`i'`j'=e(V)
			forvalues t=1/2 {
			scalar v_`var'`i'`j'`t' =vv_`var'`i'`j'[`t',`t']
			scalar bound5_`var'`i'`j'`t' =  sqrt(v_`var'`i'`j'`t')*invttail(e(df_r),0.5*(1-95/100))
			scalar bound10_`var'`i'`j'`t' =  sqrt(v_`var'`i'`j'`t')*invttail(e(df_r),0.5*(1-90/100))
			}
			replace ub5bl_`var'`j' = (_b[black`j']+bound5_`var'`i'`j'1) if year == `i'
			replace lb5bl_`var'`j' = (_b[black`j']-bound5_`var'`i'`j'1) if year == `i'
			replace ub10bl_`var'`j'=(_b[black`j']+bound10_`var'`i'`j'1) if year == `i'
			replace lb10bl_`var'`j' = (_b[black`j']-bound10_`var'`i'`j'1) if year == `i'
			replace ub5ot_`var'`j' = (_b[oth`j']+bound5_`var'`i'`j'2) if year == `i'
			replace lb5ot_`var'`j' = (_b[oth`j']-bound5_`var'`i'`j'2) if year == `i'
			replace ub10ot_`var'`j'=(_b[oth`j']+bound10_`var'`i'`j'2) if year == `i'
			replace lb10ot_`var'`j' = (_b[oth`j']-bound10_`var'`i'`j'2) if year == `i'
			}
}
}



// Now Hispanics Vs. Non-Hispanics //

local hispanics hisp1 hisp2

foreach i in `penalties' {
foreach var in `hispanics'{
	gen nhsp_`i'_`var'_m=.
	gen hsp_`i'_`var'_m=.
	gen df_`i'_`var'_m=.
	gen ub5_`i'_`var'_m=.
	gen lb5_`i'_`var'_m=.
	gen ub10_`i'_`var'_m=.
	gen lb10_`i'_`var'_m=.
		forvalues t=`startyear'/`endyear'{
			reg `i' `var' [pweight=asecfwt] if year==`t' & real_income>0, robust
			replace nhsp_`i'_`var'_m= _b[_cons] if year==`t' 
			replace df_`i'_`var'_m= _b[`var'] if year==`t'
			replace hsp_`i'_`var'_m= nhsp_`i'_`var' + df_`i'_`var' if year==`t' 
			matrix vv_`i'_`var'_`t'_m=e(V)
			scalar v_`i'_`var'_`t'_m=vv_`i'_`var'_`t'_m[1,1]
			scalar bound5_`i'_`var'_`t'_m=sqrt(v_`i'_`var'_`t'_m)*invttail(e(df_r),0.5*(1-95/100))
			scalar bound10_`i'_`var'_`t'_m=sqrt(v_`i'_`var'_`t'_m)*invttail(e(df_r),0.5*(1-90/100))
			replace ub5_`i'_`var'_m=(_b[`var']+bound5_`i'_`var'_`t'_m) if year==`t'
			replace lb5_`i'_`var'_m=(_b[`var']-bound5_`i'_`var'_`t'_m) if year==`t'
			replace ub10_`i'_`var'_m=(_b[`var']+bound10_`i'_`var'_`t'_m) if year==`t'
			replace lb10_`i'_`var'_m=(_b[`var']-bound10_`i'_`var'_`t'_m) if year==`t'
			}
}
}


// Black Vs White vs. Other Given Hispanic //

foreach i in `penalties'{
foreach var in `hispanics'{
forvalues p=0/1 {
forvalues j=1/3 {
	gen black_`i'_`var'_`p'_`j'= .
	gen white_`i'_`var'_`p'_`j'= .
	gen other_`i'_`var'_`p'_`j'=.
	gen raceblk_`i'_`var'_`p'_`j'= .
	gen raceoth_`i'_`var'_`p'_`j'= .
	gen ub5bl_`i'_`var'_`p'_`j'= .
	gen lb5bl_`i'_`var'_`p'_`j'= .
	gen ub10bl_`i'_`var'_`p'_`j'= .
	gen lb10bl_`i'_`var'_`p'_`j'= .
	gen ub5ot_`i'_`var'_`p'_`j'= .
	gen lb5ot_`i'_`var'_`p'_`j'= .
	gen ub10ot_`i'_`var'_`p'_`j'= .
	gen lb10ot_`i'_`var'_`p'_`j'= .
		forvalues b=1995/`endyear' {
			reg `i' `x`j'' [pweight=asecfwt] if year==`b' & `var'==`p' & real_income>0, robust 
			replace black_`i'_`var'_`p'_`j' = _b[black`j'] if  year== `b'
			replace other_`i'_`var'_`p'_`j'= _b[oth`j'] if  year== `b'
			replace white_`i'_`var'_`p'_`j'= _b[_cons] if year==`b'
			replace raceblk_`i'_`var'_`p'_`j'= (white_`i'_`var'_`p'_`j'+ black_`i'_`var'_`p'_`j') if year==`b'
			replace raceoth_`i'_`var'_`p'_`j'= (white_`i'_`var'_`p'_`j'+ other_`i'_`var'_`p'_`j') if year==`b'
			matrix vv_`i'_`var'_`p'_`j'=e(V)
			forvalues t=1/2 {
			scalar v_`i'_`var'_`p'_`j'_`t' =vv_`i'_`var'_`p'_`j'[`t',`t']
			scalar bound5_`i'_`var'_`p'_`j'_`t' =  sqrt(v_`i'_`var'_`p'_`j'_`t')*invttail(e(df_r),0.5*(1-95/100))
			scalar bound10_`i'_`var'_`p'_`j'_`t' =  sqrt(v_`i'_`var'_`p'_`j'_`t')*invttail(e(df_r),0.5*(1-90/100))
			}
			replace ub5bl_`i'_`var'_`p'_`j' = (_b[black`j']+bound5_`i'_`var'_`p'_`j'_1) if year == `b'
			replace lb5bl_`i'_`var'_`p'_`j' = (_b[black`j']-bound5_`i'_`var'_`p'_`j'_1) if year == `b'
			replace ub10bl_`i'_`var'_`p'_`j'=(_b[black`j']+bound10_`i'_`var'_`p'_`j'_1) if year == `b'
			replace lb10bl_`i'_`var'_`p'_`j'= (_b[black`j']-bound10_`i'_`var'_`p'_`j'_1) if year == `b'
			replace ub5ot_`i'_`var'_`p'_`j'= (_b[oth`j']+bound5_`i'_`var'_`p'_`j'_2) if year == `b'
			replace lb5ot_`i'_`var'_`p'_`j'= (_b[oth`j']-bound5_`i'_`var'_`p'_`j'_2) if year == `b'
			replace ub10ot_`i'_`var'_`p'_`j'= (_b[oth`j']+bound10_`i'_`var'_`p'_`j'_2) if year == `b'
			replace lb10ot_`i'_`var'_`p'_`j'= (_b[oth`j']-bound10_`i'_`var'_`p'_`j'_2) if year == `b'
			}
}
}
}
}


/*  

// Next few lines call on other do files to simply generate the graphs based on the variables created. //

sort year
bysort year: gen obs=_n


do "do\fed_penalties_sample2018_nm.do" //*****


*/


*log close
